부동산 문제 풀이
- 1단계 데이터 전처리
- 2단계 탐색적 자료 분석
- 2-1단계 시계열 분석
- 2-2단계 맵지도 분석
- 3단계 기계학습 예측 (TBD)
- 4단계 외부데이터 조인 (TBD)
#윈도우 10 64bit, R 3.4.2 버전
#설치 라이브러리, Rtools 설치 필요 (관리자 권한으로 설치)
#install.packages("tidyverse")
#install.packages("readxl")
#install.packages('DT')
#install.packages('glue')
#install.packages('glue')
#install.packages("htmlwidgets")
#install.packages("plotly")
#install.packages('ggmap')
#install.packages("leaflet")
#install.packages("leaflet")
#install.packages("leaflet.minicharts")
library(tidyverse)
## -- Attaching packages --------------------- tidyverse 1.2.1 --
## √ ggplot2 2.2.1.9000 √ purrr 0.2.4
## √ tibble 1.3.4 √ dplyr 0.7.4
## √ tidyr 0.7.2 √ stringr 1.2.0
## √ readr 1.1.1 √ forcats 0.2.0
## -- Conflicts ------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(DT)
library(htmlwidgets)
library(sp)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggmap)
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
##
## wind
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(leaflet)
library(leaflet.minicharts)
library(htmltools)
1단계 데이터 전처리 (1)
setwd("C:/budong/data")
files <- list.files(pattern=".xls")
DF <- NULL
files
## [1] "2010년_01월_전국_실거래가_아파트(매매).xls"
## [2] "2010년_02월_전국_실거래가_아파트(매매).xls"
## [3] "2010년_03월_전국_실거래가_아파트(매매).xls"
## [4] "2010년_04월_전국_실거래가_아파트(매매).xls"
## [5] "2010년_05월_전국_실거래가_아파트(매매).xls"
## [6] "2010년_06월_전국_실거래가_아파트(매매).xls"
## [7] "2010년_07월_전국_실거래가_아파트(매매).xls"
## [8] "2010년_08월_전국_실거래가_아파트(매매).xls"
## [9] "2010년_09월_전국_실거래가_아파트(매매).xls"
## [10] "2010년_10월_전국_실거래가_아파트(매매).xls"
## [11] "2010년_11월_전국_실거래가_아파트(매매).xls"
## [12] "2010년_12월_전국_실거래가_아파트(매매).xls"
## [13] "2011년_01월_전국_실거래가_아파트(매매).xls"
## [14] "2011년_02월_전국_실거래가_아파트(매매).xls"
## [15] "2011년_03월_전국_실거래가_아파트(매매).xls"
## [16] "2011년_04월_전국_실거래가_아파트(매매).xls"
## [17] "2011년_05월_전국_실거래가_아파트(매매).xls"
## [18] "2011년_06월_전국_실거래가_아파트(매매).xls"
## [19] "2011년_07월_전국_실거래가_아파트(매매).xls"
## [20] "2011년_08월_전국_실거래가_아파트(매매).xls"
## [21] "2011년_09월_전국_실거래가_아파트(매매).xls"
## [22] "2011년_10월_전국_실거래가_아파트(매매).xls"
## [23] "2011년_11월_전국_실거래가_아파트(매매).xls"
## [24] "2011년_12월_전국_실거래가_아파트(매매).xls"
## [25] "2012년_01월_전국_실거래가_아파트(매매).xls"
## [26] "2012년_02월_전국_실거래가_아파트(매매).xls"
## [27] "2012년_03월_전국_실거래가_아파트(매매).xls"
## [28] "2012년_04월_전국_실거래가_아파트(매매).xls"
## [29] "2012년_05월_전국_실거래가_아파트(매매).xls"
## [30] "2012년_06월_전국_실거래가_아파트(매매).xls"
## [31] "2012년_07월_전국_실거래가_아파트(매매).xls"
## [32] "2012년_08월_전국_실거래가_아파트(매매).xls"
## [33] "2012년_09월_전국_실거래가_아파트(매매).xls"
## [34] "2012년_10월_전국_실거래가_아파트(매매).xls"
## [35] "2012년_11월_전국_실거래가_아파트(매매).xls"
## [36] "2012년_12월_전국_실거래가_아파트(매매).xls"
## [37] "2013년_01월_전국_실거래가_아파트(매매).xls"
## [38] "2013년_02월_전국_실거래가_아파트(매매).xls"
## [39] "2013년_03월_전국_실거래가_아파트(매매).xls"
## [40] "2013년_04월_전국_실거래가_아파트(매매).xls"
## [41] "2013년_05월_전국_실거래가_아파트(매매).xls"
## [42] "2013년_06월_전국_실거래가_아파트(매매).xls"
## [43] "2013년_07월_전국_실거래가_아파트(매매).xls"
## [44] "2013년_08월_전국_실거래가_아파트(매매).xls"
## [45] "2013년_09월_전국_실거래가_아파트(매매).xls"
## [46] "2013년_10월_전국_실거래가_아파트(매매).xls"
## [47] "2013년_11월_전국_실거래가_아파트(매매).xls"
## [48] "2013년_12월_전국_실거래가_아파트(매매).xls"
## [49] "2014년_01월_전국_실거래가_아파트(매매).xls"
## [50] "2014년_02월_전국_실거래가_아파트(매매).xls"
## [51] "2014년_03월_전국_실거래가_아파트(매매).xls"
## [52] "2014년_04월_전국_실거래가_아파트(매매).xls"
## [53] "2014년_05월_전국_실거래가_아파트(매매).xls"
## [54] "2014년_06월_전국_실거래가_아파트(매매).xls"
## [55] "2014년_07월_전국_실거래가_아파트(매매).xls"
## [56] "2014년_08월_전국_실거래가_아파트(매매).xls"
## [57] "2014년_09월_전국_실거래가_아파트(매매).xls"
## [58] "2014년_10월_전국_실거래가_아파트(매매).xls"
## [59] "2014년_11월_전국_실거래가_아파트(매매).xls"
## [60] "2014년_12월_전국_실거래가_아파트(매매).xls"
## [61] "2015년_01월_전국_실거래가_아파트(매매).xls"
## [62] "2015년_02월_전국_실거래가_아파트(매매).xls"
## [63] "2015년_03월_전국_실거래가_아파트(매매).xls"
## [64] "2015년_04월_전국_실거래가_아파트(매매).xls"
## [65] "2015년_05월_전국_실거래가_아파트(매매).xls"
## [66] "2015년_06월_전국_실거래가_아파트(매매).xls"
## [67] "2015년_07월_전국_실거래가_아파트(매매).xls"
## [68] "2015년_08월_전국_실거래가_아파트(매매).xls"
## [69] "2015년_09월_전국_실거래가_아파트(매매).xls"
## [70] "2015년_10월_전국_실거래가_아파트(매매).xls"
## [71] "2015년_11월_전국_실거래가_아파트(매매).xls"
## [72] "2015년_12월_전국_실거래가_아파트(매매).xls"
## [73] "2016년_01월_전국_실거래가_아파트(매매).xls"
## [74] "2016년_02월_전국_실거래가_아파트(매매).xls"
## [75] "2016년_03월_전국_실거래가_아파트(매매).xls"
## [76] "2016년_04월_전국_실거래가_아파트(매매).xls"
## [77] "2016년_05월_전국_실거래가_아파트(매매).xls"
## [78] "2016년_06월_전국_실거래가_아파트(매매).xls"
## [79] "2016년_07월_전국_실거래가_아파트(매매).xls"
## [80] "2016년_08월_전국_실거래가_아파트(매매).xls"
## [81] "2016년_09월_전국_실거래가_아파트(매매).xls"
## [82] "2016년_10월_전국_실거래가_아파트(매매).xls"
## [83] "2016년_11월_전국_실거래가_아파트(매매).xls"
## [84] "2016년_12월_전국_실거래가_아파트(매매).xls"
for (f in files) {
year_month <- paste0(substr(f, 1, 4), substr(f, 7, 8))
dat <- read_excel(f, skip=7, sheet = "서울특별시")
dat <- cbind(dat, contractyear = year_month)
DF <- rbind(DF, dat)
cat('------', dim(DF), '------', '\t')
}
## ------ 4859 10 ------ ------ 8649 10 ------ ------ 12612 10 ------ ------ 15221 10 ------ ------ 17342 10 ------ ------ 19485 10 ------ ------ 21669 10 ------ ------ 23976 10 ------ ------ 26771 10 ------ ------ 31640 10 ------ ------ 37790 10 ------ ------ 44159 10 ------ ------ 51301 10 ------ ------ 57300 10 ------ ------ 62687 10 ------ ------ 66698 10 ------ ------ 70495 10 ------ ------ 74153 10 ------ ------ 78436 10 ------ ------ 83109 10 ------ ------ 87272 10 ------ ------ 91276 10 ------ ------ 94774 10 ------ ------ 98443 10 ------ ------ 101191 10 ------ ------ 105095 10 ------ ------ 109147 10 ------ ------ 112534 10 ------ ------ 115974 10 ------ ------ 118516 10 ------ ------ 120643 10 ------ ------ 122825 10 ------ ------ 126160 10 ------ ------ 131004 10 ------ ------ 135297 10 ------ ------ 139012 10 ------ ------ 142416 10 ------ ------ 146829 10 ------ ------ 153356 10 ------ ------ 160022 10 ------ ------ 166330 10 ------ ------ 171424 10 ------ ------ 174733 10 ------ ------ 179995 10 ------ ------ 187224 10 ------ ------ 194981 10 ------ ------ 200239 10 ------ ------ 206713 10 ------ ------ 213923 10 ------ ------ 222756 10 ------ ------ 230763 10 ------ ------ 236177 10 ------ ------ 241011 10 ------ ------ 246283 10 ------ ------ 252740 10 ------ ------ 261651 10 ------ ------ 271418 10 ------ ------ 279818 10 ------ ------ 285765 10 ------ ------ 292031 10 ------ ------ 301565 10 ------ ------ 311339 10 ------ ------ 326796 10 ------ ------ 338647 10 ------ ------ 348796 10 ------ ------ 359565 10 ------ ------ 370585 10 ------ ------ 379313 10 ------ ------ 389145 10 ------ ------ 399764 10 ------ ------ 406975 10 ------ ------ 411867 10 ------ ------ 416847 10 ------ ------ 422167 10 ------ ------ 430980 10 ------ ------ 441360 10 ------ ------ 452218 10 ------ ------ 465511 10 ------ ------ 477350 10 ------ ------ 488377 10 ------ ------ 499717 10 ------ ------ 511932 10 ------ ------ 517348 10 ------ ------ 519346 10 ------
setwd("C:/budong/data/2017")
files <- list.files(pattern=".xlsx")
DF2 <- NULL
for (f in files) {
dat2 <- read_excel(f)
DF2 <- rbind(DF2, dat2)
cat('------', dim(DF2), '------', '\t')
}
## ------ 3743 12 ------ ------ 9485 12 ------ ------ 18094 12 ------ ------ 27499 12 ------ ------ 42306 12 ------ ------ 55181 12 ------ ------ 70153 12 ------ ------ 75280 12 ------ ------ 81932 12 ------ ------ 84602 12 ------
dim(DF)
## [1] 519346 10
dim(DF2)
## [1] 84602 12
colnames(DF) <- c('municipality', 'address', 'complex', 'exclusivearea', 'contractdate', 'transactionamount', 'floors', 'construction', 'roadname', 'contractyear')
colnames(DF2) <- c('municipality', 'address', 'home', 'abundance', 'complex', 'exclusivearea', 'contractyear', 'contractdate', 'transactionamount', 'floors', 'construction', 'roadname')
df_colnames <- colnames(DF)
DF_all <- rbind(DF, DF2[,df_colnames])
dim(DF_all)
## [1] 603948 10
#save(DF_all, file='DF_all.Rdata')
#write.csv(DF_all, file='DF_all.csv', row.names=FALSE)
1단계 데이터 전처리 (2)
- 데이터 구조 확인
- 시, 구, 동으로 변수 구분 및 전용 면적 및 변수 전처리
glimpse(DF_all)
## Observations: 603,948
## Variables: 10
## $ municipality <chr> "서울특별시 강남구 개포동", "서울특별시 강남구 개포동", "서울특별시 강남구...
## $ address <chr> "658-1", "141", "141", "141", "141", "141", ...
## $ complex <chr> "개포6차우성아파트1동~8동", "개포주공 1단지", "개포주공 1단지", "개...
## $ exclusivearea <chr> "54.98", "35.44", "35.64", "35.64", "41.98",...
## $ contractdate <chr> "21~31", "11~20", "1~10", "11~20", "11~20", ...
## $ transactionamount <chr> "66,000", "75,000", "72,400", "75,000", "83,...
## $ floors <chr> "3", "1", "5", "1", "1", "5", "4", "4", "1",...
## $ construction <chr> "1987", "1982", "1982", "1982", "1982", "198...
## $ roadname <chr> "언주로", "개포로", "개포로", "개포로", "개포로", "개포로", "개...
## $ contractyear <fctr> 201001, 201001, 201001, 201001, 201001, 201...
DF_all <- DF_all %>% mutate(
location_1 = sapply(strsplit(DF_all$municipality, ' '), "[", 1),
location_2 = sapply(strsplit(DF_all$municipality, ' '), "[", 2),
location_3 = sapply(strsplit(DF_all$municipality, ' '), "[", 3),
exclusivearea = as.numeric(exclusivearea),
exclusivearea_section = case_when(exclusivearea < 67 ~ '0~66',
exclusivearea >= 67 & exclusivearea < 100 ~ '67~100',
exclusivearea >= 100 & exclusivearea < 132 ~ '100~131',
exclusivearea >= 132 & exclusivearea < 165 ~ '132~165',
exclusivearea >= 165 & exclusivearea < 198 ~ '165~197',
exclusivearea >= 198 & exclusivearea < 330 ~ '198~329',
exclusivearea >= 330 ~ '330이상'),
transactionamount = as.numeric(gsub(',', '', transactionamount)),
floors = as.integer(floors),
construction = as.integer(construction),
roadname = as.factor(roadname),
contractyear = ymd(paste0(contractyear,ifelse(str_sub(contractdate, 1, 2) == '1~', '01', str_sub(contractdate, 1, 2))))
)
2단계 탐색적 자료 분석
2-1단계 시계열 분석 (1)
- exclusivearea 별 매매가 분포 확인
exclusivearea_price <- DF_all %>% filter(location_1 == '서울특별시') %>%
group_by(exclusivearea_section) %>%
summarise(
count = n(),
price_mean = mean(transactionamount, na.rm=T),
price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
price_stdev = sd(transactionamount, na.rm=T)
)
datatable(exclusivearea_price)
2단계 탐색적 자료 분석
2-1단계 시계열 분석 (2)
- exclusivearea 별 매매가 분포 확인
price <- DF_all %>% filter(location_1 == '서울특별시') %>%
group_by(contractyear , location_2) %>%
summarise(
count = n(),
price_mean = mean(transactionamount, na.rm=T),
price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
price_stdev = sd(transactionamount, na.rm=T)
)
datatable(price)
2단계 탐색적 자료 분석
2-1단계 시계열 분석 (3)
ggplotly(
price %>% filter (count >= 100) %>%
ggplot(aes(contractyear , price_50, colour=location_2, group=location_2, fill=location_2)) + geom_line(alpha=1, size=2) +
xlab("") + ylab("가격 중앙값") +
theme_bw() + ggtitle("거래가격 중앙값(만원 단위)") +
theme(plot.title = element_text(size=24,face="bold")) +
theme(axis.text=element_text(size=14), axis.title=element_text(size=24,face="bold")) +
theme(strip.text.y = element_text(colour = "black", face='bold', angle = 45, size = 20,
hjust = 0.5, vjust = 0.5)) +
theme(legend.text = element_text(colour="black", size = 7)))
2단계 탐색적 자료 분석
2-1단계 시계열 분석 (3)
- 0~66 전용 면젹, 구별 transactionamount 중앙값 확인
price2 <- DF_all %>% filter(location_1 == '서울특별시' & exclusivearea_section == '0~66' ) %>%
group_by(contractyear , location_2) %>%
summarise(
count = n(),
price_mean = mean(transactionamount, na.rm=T),
price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
price_stdev = sd(transactionamount, na.rm=T)
)
ggplotly(
price2 %>% filter (count >= 30) %>%
ggplot(aes(contractyear , price_50, colour=location_2, group=location_2, fill=location_2)) + geom_line(alpha=1, size=1) +
xlab("") + ylab("가격 중앙값") +
theme_bw() + ggtitle("거래가격 중앙값(만원 단위)") +
theme(plot.title = element_text(size=24,face="bold")) +
theme(axis.text=element_text(size=14), axis.title=element_text(size=24,face="bold")) +
theme(strip.text.y = element_text(colour = "black", face='bold', angle = 45, size = 20,
hjust = 0.5, vjust = 0.5)) +
theme(legend.text = element_text(colour="black", size = 7)))
2단계 탐색적 자료 분석
2-1단계 시계열 분석 (4)
- 전용 면적 section 별 거래 매매가 중앙값 확인
exclusivearea_price <- DF_all %>% filter(location_1 == '서울특별시') %>%
group_by(contractyear , exclusivearea_section) %>%
summarise(
count = n(),
price_mean = mean(transactionamount, na.rm=T),
price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
price_stdev = sd(transactionamount, na.rm=T)
)
ggplotly(
exclusivearea_price %>% filter (count >= 50) %>%
ggplot(aes(contractyear , price_50, colour=exclusivearea_section, group=exclusivearea_section, fill=exclusivearea_section)) + geom_line(alpha=1, size=1) +
xlab("") + ylab("가격 중앙값") +
theme_bw() + ggtitle("거래가격 중앙값(만원 단위)") +
theme(plot.title = element_text(size=24,face="bold")) +
theme(axis.text=element_text(size=14), axis.title=element_text(size=24,face="bold")) +
theme(strip.text.y = element_text(colour = "black", face='bold', angle = 45, size = 20,
hjust = 0.5, vjust = 0.5)) +
theme(legend.text = element_text(colour="black", size = 7)))
2단계 탐색적 자료 분석
2-1단계 시계열 분석 (5)
- 특정 지역 전용 면적 section 별 거래 매매가 중앙값 확인
exclusivearea_location2_price <- DF_all %>% filter(location_1 == '서울특별시') %>%
group_by(contractyear , exclusivearea_section, location_2) %>%
summarise(
count = n(),
price_mean = mean(transactionamount, na.rm=T),
price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
price_stdev = sd(transactionamount, na.rm=T)
)
select_location <- c('강남구', '서초구', '종로구', '영등포구', '강서구', '마포구', '관악구', '강북구')
select_section <- c('0~66', '67~100')
ggplotly(
exclusivearea_location2_price %>% filter (count >= 30 & location_2 %in% select_location & exclusivearea_section %in% select_section) %>%
ggplot(aes(contractyear , price_50, colour=location_2, group=location_2, fill=location_2)) + geom_line(alpha=1, size=1) +
xlab("") + ylab("가격 중앙값") +
theme_bw() + ggtitle("거래가격 중앙값(만원 단위)") +
theme(plot.title = element_text(size=24,face="bold")) + facet_grid(.~exclusivearea_section) +
theme(axis.text=element_text(size=14), axis.title=element_text(size=24,face="bold")) +
theme(strip.text.y = element_text(colour = "black", face='bold', angle = 45, size = 20,
hjust = 0.5, vjust = 0.5)) +
theme(legend.text = element_text(colour="black", size = 7)))
2단계 탐색적 자료 분석
2-2단계 맵지도 분석 (1)
- 구글 맵 사용, 좌표는 시/구/동 기준으로 구글 맵에서 검색하며 찾음
- 검색이 데스크탑에서는 데이터 확보가 느려서 1천개만 샘플링
- 여기에서는 load 함수로 계산된 좌표 값 로딩
#1천개 샘플링, 주소를 좌표계로 변환하는 과정이 개별 검색해서 좌표 값 얻어내므로 느림
#DF_all2 <- DF_all[1:1000,]
#DF_address <- as.data.frame(DF_all2[,'municipality'])
#colnames(DF_address) <- 'municipality'
#DF_address$municipality <- enc2utf8(as.character(DF_address$municipality))
#DF_address2 <- mutate_geocode(DF_address, municipality, source = 'google')
#DF_all2 <- DF_all2 %>% mutate(lon = DF_address2$lon, lat = DF_address2$lat)
#save(DF_all2, file='DF_all2.Rdata')
#write.csv(DF_all2, file='DF_all2.csv', row.names=FALSE)
setwd("C:/budong/data/2017")
load('DF_all2.Rdata')
seoul_lonlat = unlist(geocode('seoul', source='google'))
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=seoul&sensor=false
head(DF_all2)
## 시군구 번지 단지명 전용면적 계약일
## 1 서울특별시 강남구 개포동 658-1 개포6차우성아파트1동~8동 54.98 21~31
## 2 서울특별시 강남구 개포동 141 개포주공 1단지 35.44 11~20
## 3 서울특별시 강남구 개포동 141 개포주공 1단지 35.64 1~10
## 4 서울특별시 강남구 개포동 141 개포주공 1단지 35.64 11~20
## 5 서울특별시 강남구 개포동 141 개포주공 1단지 41.98 11~20
## 6 서울특별시 강남구 개포동 141 개포주공 1단지 41.98 11~20
## 거래금액 층 건축년도 도로명 계약년월 location_1 location_2 location_3
## 1 66000 3 1987 언주로 2010-01-21 서울특별시 강남구 개포동
## 2 75000 1 1982 개포로 2010-01-11 서울특별시 강남구 개포동
## 3 72400 5 1982 개포로 2010-01-01 서울특별시 강남구 개포동
## 4 75000 1 1982 개포로 2010-01-11 서울특별시 강남구 개포동
## 5 83000 1 1982 개포로 2010-01-11 서울특별시 강남구 개포동
## 6 85000 5 1982 개포로 2010-01-11 서울특별시 강남구 개포동
## 전용면적_구간 lon lat
## 1 0~66 127.0609 37.47896
## 2 0~66 127.0609 37.47896
## 3 0~66 127.0609 37.47896
## 4 0~66 127.0609 37.47896
## 5 0~66 127.0609 37.47896
## 6 0~66 127.0609 37.47896
colnames(DF_all2) <- c('municipality', 'address', 'complex', 'exclusivearea', 'contractdate', 'transactionamount', 'floors', 'construction', 'roadname', 'contractyear', 'location_1', 'location_2', 'location_3', 'exclusivearea_section', 'lon', 'lat')
2단계 탐색적 자료 분석
2-2단계 맵지도 분석 (2)
- 1천개 샘플 데이터, 좌표 기반 매매가 가격(라벨)
leaflet(DF_all2) %>%
setView(lng = seoul_lonlat[1],
lat = seoul_lonlat[2],
zoom = 11) %>%
addTiles() %>%
addMarkers(lng = ~lon, lat = ~lat, popup = ~as.character(transactionamount), label = ~as.character(transactionamount)) %>%
addProviderTiles(providers$OpenStreetMap)
## Warning in validateCoords(lng, lat, funcName): Data contains 19 rows with
## either missing or invalid lat/lon values and will be ignored
## Input to asJSON(keep_vec_names=TRUE) is a named vector. In a future version of jsonlite, this option will not be supported, and named vectors will be translated into arrays instead of objects. If you want JSON object output, please use a named list instead. See ?toJSON.
2단계 탐색적 자료 분석
2-2단계 맵지도 분석 (3)
DF_all3 <- DF_all2 %>% group_by(lon , lat) %>%
summarise(
count = n(),
exclusivearea_section_0_66 = sum(ifelse(exclusivearea_section == '0~66', 1, 0)),
exclusivearea_section_67_100 = sum(ifelse(exclusivearea_section == '67~100', 1, 0)),
exclusivearea_section_100_131 = sum(ifelse(exclusivearea_section == '100~131', 1, 0)),
exclusivearea_section_132_165 = sum(ifelse(exclusivearea_section == '132~165', 1, 0)),
price_25 = quantile(transactionamount, probs= 0.25, na.rm=T),
price_50 = quantile(transactionamount, probs= 0.50, na.rm=T),
price_75 = quantile(transactionamount, probs= 0.75, na.rm=T),
price_stdev = sd(transactionamount, na.rm=T)
)
DF_all3
## # A tibble: 31 x 11
## # Groups: lon [?]
## lon lat count exclusivearea_section_0_66
## <dbl> <dbl> <int> <dbl>
## 1 126.8104 37.55895 1 0
## 2 126.8139 37.57646 36 26
## 3 126.8307 37.57649 7 4
## 4 126.8342 37.56483 48 37
## 5 126.8346 37.55464 14 2
## 6 126.8533 37.55865 30 19
## 7 126.8718 37.55403 22 11
## 8 127.0089 37.63873 5 1
## 9 127.0229 37.52412 4 2
## 10 127.0269 37.62683 27 15
## # ... with 21 more rows, and 7 more variables:
## # exclusivearea_section_67_100 <dbl>,
## # exclusivearea_section_100_131 <dbl>,
## # exclusivearea_section_132_165 <dbl>, price_25 <dbl>, price_50 <dbl>,
## # price_75 <dbl>, price_stdev <dbl>
pal <- colorNumeric(c("red", "green", "blue", "yellow"), 1:10)
basemap <- leaflet(width = "100%", height = "400px") %>%
setView(lng = seoul_lonlat[1],
lat = seoul_lonlat[2],
zoom = 11) %>%
addTiles() %>%
addProviderTiles(providers$OpenStreetMap)
basemap %>%
addMinicharts(
DF_all3$lon, DF_all3$lat,
chartdata = DF_all3$count,
showLabels = TRUE,
width = 45
)
## Input to asJSON(keep_vec_names=TRUE) is a named vector. In a future version of jsonlite, this option will not be supported, and named vectors will be translated into arrays instead of objects. If you want JSON object output, please use a named list instead. See ?toJSON.
2단계 탐색적 자료 분석
2-2단계 맵지도 분석 (4)
- 1천개 샘플 데이터, 좌표별 exclusivearea 파이 차트
pal <- colorNumeric(c("red", "green", "blue", "yellow"), 1:10)
basemap %>%
addMinicharts(
DF_all3$lon, DF_all3$lat,
type = "pie",
chartdata = DF_all3[, c("exclusivearea_section_0_66", "exclusivearea_section_67_100", "exclusivearea_section_100_131", "exclusivearea_section_132_165")],
colorPalette = pal(c(1,6,9,10)),
width = 45
)
## Input to asJSON(keep_vec_names=TRUE) is a named vector. In a future version of jsonlite, this option will not be supported, and named vectors will be translated into arrays instead of objects. If you want JSON object output, please use a named list instead. See ?toJSON.
2단계 탐색적 자료 분석
2-2단계 맵지도 분석 (6)
- 1천개 샘플 데이터, 좌표/exclusivearea별 매매가 가격 중앙값 비교
pal <- colorNumeric(c("red", "green", "blue", "yellow"), 1:10)
DF_all4 <- DF_all2 %>% group_by(lon , lat, exclusivearea_section) %>%
summarise(price_50 = quantile(transactionamount, probs= 0.50, na.rm=T))
DF_all4 <- DF_all4 %>% spread(exclusivearea_section,price_50)
DF_all4[is.na(DF_all4)] <- 0
basemap %>%
addMinicharts(
DF_all4$lon, DF_all4$lat,
chartdata = DF_all4[, c("0~66","100~131","132~165","165~197","67~100")],
colorPalette = pal(c(1,6,9,10)),
width = 45
)
## Input to asJSON(keep_vec_names=TRUE) is a named vector. In a future version of jsonlite, this option will not be supported, and named vectors will be translated into arrays instead of objects. If you want JSON object output, please use a named list instead. See ?toJSON.